I'm gonna see how scikit dimensionality reduction codes work on our data.


In [117]:
from pearce.emulator import OriginalRecipe, ExtraCrispy, SpicyBuffalo
from pearce.mocks import cat_dict
import numpy as np
from os import path

In [118]:
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

In [119]:
training_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_lowmsat/PearceRedMagicXiCosmoFixedNd.hdf5'
test_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_test//PearceRedMagicXiCosmoFixedNd_Test.hdf5'
em_method = 'gp'
split_method = 'random'

In [120]:
a = 1.0
z = 1.0/a - 1.0

In [121]:
fixed_params = {'z':z, 'r':24.06822623}#, 'cosmo': 0}#,

In [122]:
np.random.seed(0)
emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,\
                     custom_mean_function = 'linear', downsample_factor=0.02)#,
                    #hyperparams = {'n_estimators': 500,
                    #              'max_depth': 5})

In [123]:
fixed_params = {'z':z, 'r':24.06822623}#, 'cosmo': 0}#,

In [124]:
x, y, _, _ = emu.get_data(training_file,fixed_params)

In [125]:
print x.shape


(40000, 11)

In [126]:
from sklearn.manifold import Isomap, LocallyLinearEmbedding, SpectralEmbedding
from sklearn.decomposition import PCA
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import scale
x = scale(x)

In [127]:
x_train, _, y_train, _ = train_test_split(x,y,  train_size = 0.25 )

In [146]:
embedding = Isomap(n_components=4, n_neighbors=100)
#embedding = PCA(n_components=x.shape[1])
#embedding = LocallyLinearEmbedding(n_components=4, n_neighbors = 20)
#embedding = SpectralEmbedding(n_components = 4, n_neighbors = 10)

In [ ]:
x_embeded = embedding.fit_transform(x_train)

In [ ]:
x_embeded.shape

In [ ]:
y_color = (y_train - y_train.min())/(y_train-y_train.min()).max()

In [ ]:
pal = sns.cubehelix_palette(12)
sns.palplot(pal)
sns.set_palette(pal)

In [ ]:
plt.scatter(x_embeded[:, 0], x_embeded[:, 1], c = y_color, alpha = 0.3)

In [ ]:
plt.scatter(x_embeded[:, 1], x_embeded[:, 2], c = y_color, alpha = 0.3)

In [ ]:
plt.scatter(x_embeded[:, 0], x_embeded[:, 2], c = y_color, alpha = 0.3)

In [ ]:
for i in xrange(x_embeded.shape[1]):
    plt.scatter(x_embeded[:,i], y_train, c = y_color)
    plt.show()

In [ ]:


In [ ]: